Fix localhost live migration. We were overvigorously wiping out the store
authoremellor@ewan <emellor@ewan>
Tue, 4 Oct 2005 10:14:50 +0000 (11:14 +0100)
committeremellor@ewan <emellor@ewan>
Tue, 4 Oct 2005 10:14:50 +0000 (11:14 +0100)
entries when a domain closed and on save, which meant that the /vm entries
disappeared when a localhost migration occurred.  XendCheckpoint has had extra
exception handling and logging added.  It also now calls back through
XendDomain.restore_,which has the correct locking semantics to prevent race
conditions during migration.

Added assertions to XendCheckpoint to ensure that the channels are set after
XendDomainInfo.restore.  I don't see why they would not be, and the old code
meant that in the case that they were not, IntroduceDomain would not be called
on the new domain, breaking Xend restart.

relocate calls through XendDomain.domain_restore_fd rather than directly to
XendCheckpoint to isolate XendCheckpoint from the rest of the world, and to
allow XendDomain to pass itself into XendCheckpoint for a callback.

Simplify the XendCheckpoint / XendDomainInfo interlock, giving only two
states, OK and TERMINATED.  If XendCheckpoint asks for a suspend, but sees a
shutdown, it is valid for it to proceed -- either way the domain has stopped.
Higher level tools may wish to disallow this, but at the very least, there is
no sense in waiting for a suspend that will never come.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
tools/python/xen/xend/XendCheckpoint.py
tools/python/xen/xend/XendDomain.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/server/relocate.py

index 9c38fab5b1d3781eaf8d2732f96ab73e458231ae..38e53707253ed541273ec2e279cb230072c73fd8 100644 (file)
@@ -1,4 +1,5 @@
 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
+# Copyright (C) 2005 XenSource Ltd
 
 # This file is subject to the terms and conditions of the GNU General
 # Public License.  See the file "COPYING" in the main directory of
@@ -15,7 +16,6 @@ from xen.util.xpopen import xPopen3
 
 import xen.lowlevel.xc
 
-import XendDomainInfo
 from xen.xend.xenstore.xsutil import IntroduceDomain
 
 from XendError import XendError
@@ -42,58 +42,75 @@ def read_exact(fd, size, errmsg):
         raise XendError(errmsg)
     return buf
 
-def save(xd, fd, dominfo, live):
+def save(fd, dominfo, live):
     write_exact(fd, SIGNATURE, "could not write guest state file: signature")
 
     config = sxp.to_string(dominfo.sxpr())
-    write_exact(fd, pack("!i", len(config)),
-                "could not write guest state file: config len")
-    write_exact(fd, config, "could not write guest state file: config")
-
-    # xc_save takes three customization parameters: maxit, max_f, and flags
-    # the last controls whether or not save is 'live', while the first two
-    # further customize behaviour when 'live' save is enabled. Passing "0"
-    # simply uses the defaults compiled into libxenguest; see the comments 
-    # and/or code in xc_linux_save() for more information. 
-    cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
-           str(dominfo.getDomid()), "0", "0", str(int(live)) ]
-    log.info("[xc_save] " + join(cmd))
-    child = xPopen3(cmd, True, -1, [fd, xc.handle()])
+
+    domain_name = dominfo.getName()
+
+    if live:
+        dominfo.setName('migrating-' + domain_name)
+
+    try:
+        write_exact(fd, pack("!i", len(config)),
+                    "could not write guest state file: config len")
+        write_exact(fd, config, "could not write guest state file: config")
+
+        # xc_save takes three customization parameters: maxit, max_f, and
+        # flags the last controls whether or not save is 'live', while the
+        # first two further customize behaviour when 'live' save is
+        # enabled. Passing "0" simply uses the defaults compiled into
+        # libxenguest; see the comments and/or code in xc_linux_save() for
+        # more information.
+        cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
+               str(dominfo.getDomid()), "0", "0", str(int(live)) ]
+        log.info("[xc_save] " + join(cmd))
+        child = xPopen3(cmd, True, -1, [fd, xc.handle()])
     
-    lasterr = ""
-    p = select.poll()
-    p.register(child.fromchild.fileno())
-    p.register(child.childerr.fileno())
-    while True: 
-        r = p.poll()
-        for (fd, event) in r:
-            if not event & select.POLLIN:
-                continue
-            if fd == child.childerr.fileno():
-                l = child.childerr.readline()
-                log.error(l.rstrip())
-                lasterr = l.rstrip()
-            if fd == child.fromchild.fileno():
-                l = child.fromchild.readline()
-                if l.rstrip() == "suspend":
-                    log.info("suspending %d" % dominfo.getDomid())
-                    xd.domain_shutdown(dominfo.getDomid(), reason='suspend')
-                    dominfo.state_wait(XendDomainInfo.STATE_VM_SUSPENDED)
-                    log.info("suspend %d done" % dominfo.getDomid())
-                    child.tochild.write("done\n")
-                    child.tochild.flush()
-        if filter(lambda (fd, event): event & select.POLLHUP, r):
-            break
-
-    if child.wait() >> 8 == 127:
-        lasterr = "popen %s failed" % PATH_XC_SAVE
-    if child.wait() != 0:
-        raise XendError("xc_save failed: %s" % lasterr)
-
-    dominfo.destroy()
-    return None
-
-def restore(fd):
+        lasterr = ""
+        p = select.poll()
+        p.register(child.fromchild.fileno())
+        p.register(child.childerr.fileno())
+        while True: 
+            r = p.poll()
+            for (fd, event) in r:
+                if not event & select.POLLIN:
+                    continue
+                if fd == child.childerr.fileno():
+                    l = child.childerr.readline()
+                    log.error(l.rstrip())
+                    lasterr = l.rstrip()
+                if fd == child.fromchild.fileno():
+                    l = child.fromchild.readline()
+                    if l.rstrip() == "suspend":
+                        log.info("suspending %d", dominfo.getDomid())
+                        dominfo.shutdown('suspend')
+                        dominfo.waitForShutdown()
+                        log.info("suspend %d done", dominfo.getDomid())
+                        child.tochild.write("done\n")
+                        child.tochild.flush()
+            if filter(lambda (fd, event): event & select.POLLHUP, r):
+                break
+
+        if child.wait() >> 8 == 127:
+            lasterr = "popen %s failed" % PATH_XC_SAVE
+        if child.wait() != 0:
+            raise XendError("xc_save failed: %s" % lasterr)
+
+        dominfo.destroyDomain()
+    except Exception, exn:
+        log.exception("Save failed on domain %s (%d).", domain_name,
+                      dominfo.getDomid())
+        try:
+            if live:
+                dominfo.setName(domain_name)
+        except:
+            log.exception("Failed to reset the migrating domain's name")
+        raise Exception, exn
+
+
+def restore(xd, fd):
     signature = read_exact(fd, len(SIGNATURE),
         "not a valid guest state file: signature read")
     if signature != SIGNATURE:
@@ -112,71 +129,72 @@ def restore(fd):
         raise XendError("not a valid guest state file: config parse")
 
     vmconfig = p.get_val()
-    dominfo = XendDomainInfo.restore(vmconfig)
 
-    l = read_exact(fd, sizeof_unsigned_long,
-                   "not a valid guest state file: pfn count read")
-    nr_pfns = unpack("=L", l)[0]   # XXX endianess
-    if nr_pfns > 1024*1024:     # XXX
-        raise XendError(
-            "not a valid guest state file: pfn count out of range")
+    dominfo = xd.restore_(vmconfig)
 
-    if dominfo.store_channel:
-        store_evtchn = dominfo.store_channel.port2
-    else:
-        store_evtchn = 0
+    assert dominfo.store_channel
+    assert dominfo.console_channel
+
+    try:
+        l = read_exact(fd, sizeof_unsigned_long,
+                       "not a valid guest state file: pfn count read")
+        nr_pfns = unpack("=L", l)[0]   # XXX endianess
+        if nr_pfns > 1024*1024:     # XXX
+            raise XendError(
+                "not a valid guest state file: pfn count out of range")
 
-    if dominfo.console_channel:
+        store_evtchn = dominfo.store_channel.port2
         console_evtchn = dominfo.console_channel.port2
-    else:
-        console_evtchn = 0
-
-    cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
-           str(dominfo.getDomid()), str(nr_pfns),
-           str(store_evtchn), str(console_evtchn)]
-    log.info("[xc_restore] " + join(cmd))
-    child = xPopen3(cmd, True, -1, [fd, xc.handle()])
-    child.tochild.close()
-
-    lasterr = ""
-    p = select.poll()
-    p.register(child.fromchild.fileno())
-    p.register(child.childerr.fileno())
-    while True:
-        r = p.poll()
-        for (fd, event) in r:
-            if not event & select.POLLIN:
-                continue
-            if fd == child.childerr.fileno():
-                l = child.childerr.readline()
-                log.error(l.rstrip())
-                lasterr = l.rstrip()
-            if fd == child.fromchild.fileno():
-                l = child.fromchild.readline()
-                while l:
-                    log.info(l.rstrip())
-                    m = re.match(r"^(store-mfn) (\d+)\n$", l)
-                    if m:
-                        if dominfo.store_channel:
+
+        cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
+               str(dominfo.getDomid()), str(nr_pfns),
+               str(store_evtchn), str(console_evtchn)]
+        log.info("[xc_restore] " + join(cmd))
+        child = xPopen3(cmd, True, -1, [fd, xc.handle()])
+        child.tochild.close()
+
+        lasterr = ""
+        p = select.poll()
+        p.register(child.fromchild.fileno())
+        p.register(child.childerr.fileno())
+        while True:
+            r = p.poll()
+            for (fd, event) in r:
+                if not event & select.POLLIN:
+                    continue
+                if fd == child.childerr.fileno():
+                    l = child.childerr.readline()
+                    log.error(l.rstrip())
+                    lasterr = l.rstrip()
+                if fd == child.fromchild.fileno():
+                    l = child.fromchild.readline()
+                    while l:
+                        log.info(l.rstrip())
+                        m = re.match(r"^(store-mfn) (\d+)\n$", l)
+                        if m:
                             store_mfn = int(m.group(2))
                             dominfo.setStoreRef(store_mfn)
                             IntroduceDomain(dominfo.getDomid(),
                                             store_mfn,
                                             dominfo.store_channel.port1,
                                             dominfo.getDomainPath())
-                    m = re.match(r"^(console-mfn) (\d+)\n$", l)
-                    if m:
-                        dominfo.setConsoleRef(int(m.group(2)))
-                    try:
-                        l = child.fromchild.readline()
-                    except:
-                        l = None
-        if filter(lambda (fd, event): event & select.POLLHUP, r):
-            break
-
-    if child.wait() >> 8 == 127:
-        lasterr = "popen %s failed" % PATH_XC_RESTORE
-    if child.wait() != 0:
-        raise XendError("xc_restore failed: %s" % lasterr)
-
-    return dominfo
+                        m = re.match(r"^(console-mfn) (\d+)\n$", l)
+                        if m:
+                            dominfo.setConsoleRef(int(m.group(2)))
+                        try:
+                            l = child.fromchild.readline()
+                        except:
+                            l = None
+            if filter(lambda (fd, event): event & select.POLLHUP, r):
+                break
+
+        if child.wait() >> 8 == 127:
+            lasterr = "popen %s failed" % PATH_XC_RESTORE
+        if child.wait() != 0:
+            raise XendError("xc_restore failed: %s" % lasterr)
+
+        return dominfo
+    except:
+        log.exception("Restore failed")
+        dominfo.destroy()
+        raise
index f7da646e40778fd672c9b5e249d40e374e215acc..69a9917b1f25af6f17a8c9111f10571f5048e655 100644 (file)
@@ -239,14 +239,42 @@ class XendDomain:
         """
 
         try:
-            fd = os.open(src, os.O_RDONLY)
-            dominfo = XendCheckpoint.restore(fd)
-            self._add_domain(dominfo)
-            return dominfo
+            return self.domain_restore_fd(os.open(src, os.O_RDONLY))
         except OSError, ex:
             raise XendError("can't read guest state file %s: %s" %
                             (src, ex[1]))
 
+    def domain_restore_fd(self, fd):
+        """Restore a domain from the given file descriptor."""
+
+        try:
+            XendCheckpoint.restore(self, fd)
+        except Exception, ex:
+            log.exception("Restore failed")
+            raise
+
+
+    def restore_(self, config):
+        """Create a domain as part of the restore process.  This is called
+        only from {@link XendCheckpoint}.
+
+        A restore request comes into XendDomain through {@link
+        #domain_restore} or {@link #domain_restore_fd}.  That request is
+        forwarded immediately to XendCheckpoint which, when it is ready, will
+        call this method.  It is necessary to come through here rather than go
+        directly to {@link XendDomainInfo.restore} because we need to
+        serialise the domain creation process, but cannot lock
+        domain_restore_fd as a whole, otherwise we will deadlock waiting for
+        the old domain to die.
+        """
+        self.domains_lock.acquire()
+        try:
+            dominfo = XendDomainInfo.restore(config)
+            self._add_domain(dominfo)
+            return dominfo
+        finally:
+            self.domains_lock.release()
+
 
     def domain_lookup(self, id):
         self.domains_lock.acquire()
@@ -384,19 +412,8 @@ class XendDomain:
         port = xroot.get_xend_relocation_port()
         sock = relocate.setupRelocation(dst, port)
 
-        # temporarily rename domain for localhost migration
-        if dst == "localhost":
-            dominfo.setName("tmp-" + dominfo.getName())
-
-        try:
-            XendCheckpoint.save(self, sock.fileno(), dominfo, live)
-        except:
-            if dst == "localhost":
-                dominfo.setName(
-                    string.replace(dominfo.getName(), "tmp-", "", 1))
-            raise
+        XendCheckpoint.save(sock.fileno(), dominfo, live)
         
-        return None
 
     def domain_save(self, id, dst):
         """Start saving a domain to file.
@@ -411,7 +428,7 @@ class XendDomain:
             fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
 
             # For now we don't support 'live checkpoint' 
-            return XendCheckpoint.save(self, fd, dominfo, False)
+            return XendCheckpoint.save(fd, dominfo, False)
 
         except OSError, ex:
             raise XendError("can't write guest state file %s: %s" %
index 45470888560c879bbf347ec8bd1c93b18974c12d..5eaa0b71fa1db10032f56feb87de49bede6d15eb 100644 (file)
@@ -80,7 +80,6 @@ restart_modes = [
 
 STATE_VM_OK         = "ok"
 STATE_VM_TERMINATED = "terminated"
-STATE_VM_SUSPENDED  = "suspended"
 
 """Flag for a block device backend domain."""
 SIF_BLK_BE_DOMAIN = (1<<4)
@@ -624,21 +623,22 @@ class XendDomainInfo:
                     # The domain no longer exists.  This will occur if we have
                     # scheduled a timer to check for shutdown timeouts and the
                     # shutdown succeeded.  It will also occur if someone
-                    # destroys a domain beneath us.  We clean up, just in
-                    # case.
+                    # destroys a domain beneath us.  We clean up the domain,
+                    # just in case, but we can't clean up the VM, because that
+                    # VM may have migrated to a different domain on this
+                    # machine.
                     self.cleanupDomain()
-                    self.cleanupVm()
                     return
 
             if xeninfo['dying']:
                 # Dying means that a domain has been destroyed, but has not
-                # yet been cleaned up by Xen.  This could persist indefinitely
-                # if, for example, another domain has some of its pages
-                # mapped.  We might like to diagnose this problem in the
-                # future, but for now all we do is make sure that it's not
-                # us holding the pages, by calling the cleanup methods.
+                # yet been cleaned up by Xen.  This state could persist
+                # indefinitely if, for example, another domain has some of its
+                # pages mapped.  We might like to diagnose this problem in the
+                # future, but for now all we do is make sure that it's not us
+                # holding the pages, by calling cleanupDomain.  We can't
+                # clean up the VM, as above.
                 self.cleanupDomain()
-                self.cleanupVm()
                 return
 
             elif xeninfo['crashed']:
@@ -651,10 +651,11 @@ class XendDomainInfo:
                 restart_reason = 'crash'
 
             elif xeninfo['shutdown']:
-                if self.readDom('xend/shutdown'):
+                if self.readDom('xend/shutdown_completed'):
                     # We've seen this shutdown already, but we are preserving
                     # the domain for debugging.  Leave it alone.
-                    pass
+                    return
+
                 else:
                     reason = shutdown_reason(xeninfo['shutdown_reason'])
 
@@ -664,7 +665,7 @@ class XendDomainInfo:
                     self.clearRestart()
 
                     if reason == 'suspend':
-                        self.state_set(STATE_VM_SUSPENDED)
+                        self.state_set(STATE_VM_TERMINATED)
                         # Don't destroy the domain.  XendCheckpoint will do
                         # this once it has finished.
                     elif reason in ['poweroff', 'reboot']:
@@ -701,7 +702,7 @@ class XendDomainInfo:
         if not reason in shutdown_reasons.values():
             raise XendError('invalid reason:' + reason)
         self.storeDom("control/shutdown", reason)
-        if not reason == 'suspend':
+        if reason != 'suspend':
             self.storeDom('xend/shutdown_start_time', time.time())
 
 
@@ -720,11 +721,6 @@ class XendDomainInfo:
          "rename-restart" : self.renameRestart}[self.info['on_' + reason]]()
 
 
-    def preserve(self):
-        log.info("Preserving dead domain %s (%d).", self.info['name'],
-                 self.domid)
-
-
     def renameRestart(self):
         self.restart(True)
 
@@ -814,9 +810,9 @@ class XendDomainInfo:
 
     ## public:
 
-    def state_wait(self, state):
+    def waitForShutdown(self):
         self.state_updated.acquire()
-        while self.state != state:
+        while self.state == STATE_VM_OK:
             self.state_updated.wait()
         self.state_updated.release()
 
@@ -1054,7 +1050,6 @@ class XendDomainInfo:
         """Cleanup domain resources; release devices.  Idempotent.  Nothrow
         guarantee."""
 
-        self.state_set(STATE_VM_TERMINATED)
         self.release_devices()
         self.closeStoreChannel()
         self.closeConsoleChannel()
@@ -1087,8 +1082,14 @@ class XendDomainInfo:
 
         log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
 
-        self.cleanupDomain()
         self.cleanupVm()
+        self.destroyDomain()
+
+
+    def destroyDomain(self):
+        log.debug("XendDomainInfo.destroyDomain(%s)", str(self.domid))
+
+        self.cleanupDomain()
         
         try:
             if self.domid is not None:
@@ -1096,6 +1097,8 @@ class XendDomainInfo:
         except Exception:
             log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.")
 
+        self.state_set(STATE_VM_TERMINATED)
+
 
     ## private:
 
@@ -1243,14 +1246,18 @@ class XendDomainInfo:
 
         try:
             if rename:
-                self.preserveShutdownDomain()
+                self.preserveForRestart()
             else:
-                self.cleanupDomain()
                 self.destroy()
                 
             try:
                 xd = get_component('xen.xend.XendDomain')
-                xd.domain_unpause(xd.domain_create(config).getDomid())
+                new_dom = xd.domain_create(config)
+                try:
+                    xc.domain_unpause(new_dom.getDomid())
+                except:
+                    new_dom.destroy()
+                    raise
             except Exception, exn:
                 log.exception('Failed to restart domain %d.', self.domid)
         finally:
@@ -1260,7 +1267,7 @@ class XendDomainInfo:
         #        self.exportToDB()
 
 
-    def preserveShutdownDomain(self):
+    def preserveForRestart(self):
         """Preserve a domain that has been shut down, by giving it a new UUID,
         cloning the VM details, and giving it a new name.  This allows us to
         keep this domain for debugging, but restart a new one in its place
@@ -1276,8 +1283,14 @@ class XendDomainInfo:
         self.uuid = new_uuid
         self.vmpath = VMROOT + new_uuid
         self.storeVmDetails()
-        self.storeDom('vm', self.vmpath)
-        self.storeDom('xend/shutdown', 'True')
+        self.preserve()
+
+
+    def preserve(self):
+        log.info("Preserving dead domain %s (%d).", self.info['name'],
+                 self.domid)
+        self.storeDom('xend/shutdown_completed', 'True')
+        self.set_state(STATE_VM_TERMINATED)
 
 
     def generateShutdownName(self):
index 610c459af2dab2daa368db54182523ac6133438b..42d372040992bdc0c92b0c59ddc0e8e78441ae82 100644 (file)
@@ -28,7 +28,6 @@ from xen.xend import EventServer
 from xen.xend.XendError import XendError
 from xen.xend import XendRoot
 from xen.xend.XendLogging import log
-from xen.xend import XendCheckpoint
 
 
 eserver = EventServer.instance()
@@ -120,7 +119,8 @@ class RelocationProtocol(protocol.Protocol):
         if self.transport:
             self.send_reply(["ready", name])
             self.transport.sock.setblocking(1)
-            XendCheckpoint.restore(self.transport.sock.fileno())
+            xd = xroot.get_component("xen.xend.XendDomain")
+            xd.domain_restore_fd(self.transport.sock.fileno())
             self.transport.sock.setblocking(0)
         else:
             log.error(name + ": no transport")